# -*- coding: utf-8 -*-
import random
from datetime import datetime
import scrapy
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from scrapy.utils.project import get_project_settings
from zc_core.dao.batch_dao import BatchDao
from zc_core.dao.sku_pool_dao import SkuPoolDao
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.done_filter import DoneFilter
from zc_core.util.encrypt_util import short_uuid
from zc_core.util.http_util import retry_request
from zc_core.spiders.base import BaseSpider
from hsysmall.rules import parse_item_data, parse_group, parse_order_item


class FullSpider(BaseSpider):
    name = 'full'
    # 常用链接
    item_price_url = 'http://119.3.240.114/sync/getGoodsInfoFromShopOnline.do'
    item_url = 'http://119.3.240.114:80/godTotalGoods/web_product_details.do?pkGoodsTotalGoodsId={}'
    order_list_url = 'http://119.3.240.114/godTotalGoods/web_product_details.do?pkGoodsTotalGoodsId={}&page={}&type=2'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        # 避免重复采集
        self.done_filter = DoneFilter(self.batch_no)

    def start_requests(self):
        settings = get_project_settings()
        pool_list = SkuPoolDao().get_sku_pool_list()
        self.logger.info('全量: %s' % (len(pool_list)))
        random.shuffle(pool_list)
        for sku in pool_list:
            sku_id = sku.get('_id')
            # 避免无效采集
            offline_time = sku.get('offlineTime', 0)
            if offline_time > settings.get('MAX_OFFLINE_TIME', 2):
                self.logger.info('忽略: [%s][%s]', sku_id, offline_time)
                continue
            if self.done_filter.contains(sku_id) and not settings.get('FORCE_RECOVER', False):
                self.logger.info('已采: %s', sku_id)
                continue

            # 采集商品数据
            yield Request(
                url=self.item_url.format(sku_id),
                callback=self.parse_item_data,
                errback=self.error_back,
                meta={
                    'reqType': 'item',
                    'batchNo': self.batch_no,
                    'skuId': sku_id
                },
                dont_filter=True
            )

    # 处理商品数据
    def parse_item_data(self, response):
        meta = response.meta
        sku_id = meta.get('skuId')
        # 随机生成虚拟组编号
        spu_id = short_uuid()
        # 处理商品关联关系
        group = parse_group(response)
        if group and group.get('skuIdList'):
            group['spuId'] = spu_id
            yield group

        # 处理ItemData
        data = parse_item_data(response)
        data['spuId'] = spu_id
        self.logger.info('商品: [%s]' % data.get('skuId'))
        yield data

        # 处理订单
        meta = response.meta
        curr_page = meta.get('orderPage', 1)
        # 处理本页
        orders, next_page = parse_order_item(response)
        if orders:
            self.logger.info('订单1: sku=%s, page=%s, cnt=%s' % (sku_id, curr_page, len(orders)))
            for order in orders:
                yield order

            # 采集下一页
            if next_page and next_page != -1:
                sku_id = meta.get('skuId')
                # 采集商品数据
                yield Request(
                    url=self.order_list_url.format(sku_id, next_page),
                    callback=self.parse_order_item,
                    errback=self.error_back,
                    meta={
                        'reqType': 'item',
                        'batchNo': self.batch_no,
                        'skuId': sku_id,
                        'orderPage': next_page
                    },
                    priority=100,
                    dont_filter=True
                )

    # 处理order列表
    def parse_order_item(self, response):
        # 处理订单
        meta = response.meta
        curr_page = meta.get('orderPage')
        sku_id = meta.get('skuId')

        # 处理本页
        orders, next_page = parse_order_item(response)
        if orders:
            self.logger.info('订单2: sku=%s, page=%s, cnt=%s' % (sku_id, curr_page, len(orders)))
            for order in orders:
                yield order

            if next_page and next_page != -1:
                # 采集下一页
                yield Request(
                    url=self.order_list_url.format(sku_id, next_page),
                    callback=self.parse_order_item,
                    errback=self.error_back,
                    meta={
                        'reqType': 'item',
                        'batchNo': self.batch_no,
                        'skuId': sku_id,
                        'orderPage': next_page
                    },
                    priority=300,
                    dont_filter=True
                )
